--- Haskell compatibility
module Data.Char where

import frege.Prelude hiding (ord, chr, isNumber)
import frege.prelude.PreludeText

private type C = Char

--- The general categories for unicode characters and code points.
data GeneralCategory = UNASSIGNED | UPPERCASE_LETTER | LOWERCASE_LETTER
                     | TITLECASE_LETTER | MODIFIER_LETTER | OTHER_LETTER
                     | NON_SPACING_MARK | ENCLOSING_MARK | COMBINING_SPACING_MARK
                     | DECIMAL_DIGIT_NUMBER | LETTER_NUMBER | OTHER_NUMBER
                     | SPACE_SEPARATOR | LINE_SEPARATOR | PARAGRAPH_SEPARATOR
                     | CONTROL | FORMAT | CATEGORY17 | PRIVATE_USE | SURROGATE
                     | DASH_PUNCTUATION | START_PUNCTUATION | END_PUNCTUATION
                     | CONNECTOR_PUNCTUATION | OTHER_PUNCTUATION
                     | MATH_SYMBOL | CURRENCY_SYMBOL | MODIFIER_SYMBOL
                     | OTHER_SYMBOL 
                     | INITIAL_QUOTE_PUNCTUATION | FINAL_QUOTE_PUNCTUATION

--- return the general category of a 'Char'
generalCategory :: Char -> GeneralCategory
generalCategory ch
    | n >= (UNASSIGNED).ord, n <= (FINAL_QUOTE_PUNCTUATION).ord = from n
    | otherwise = UNASSIGNED 
    where n = C.getType ch

derive Enum         GeneralCategory
derive Bounded      GeneralCategory
derive Show         GeneralCategory
instance PrimitiveArrayElement GeneralCategory where
    pure native javaClass "short.class" :: Class GeneralCategory

--- The ordinal value of a 'Char'
protected ord = C.ord

--- The character with the given ordinal value
--- The argument must be in the range  'Char.minBound' .. 'Char.maxBound', inclusive.
protected chr = C.from

--- Check if the character is a lower case letter
isLower = C.isLowerCase

--- Returns the lowercase letter if the argument is a letter, otherwise the argument unchanged.
toLower = C.toLowerCase

--- Check if the character argument is an uppercase letter.
isUpper = C.isUpperCase

--- Convert the argument to uppercase, or return it unchanged if it is not a letter.
toUpper = C.toUpperCase

--- Convert the argument to uppercase, or return it unchanged if it is not a letter.
toTitle = C.toTitleCase

--- Check if the argument is a (decimal) digit.
isDigit = C.isDigit

--- True if the argument is a space character. 
--- Note that certain characters with Unicode category 'CONTROL' count as space.
isSpace = C.isWhitespace

--- Check if the argument is a control character.
isControl c = generalCategory c == CONTROL

--- Check if the argument is a letter.
isAlpha = C.isLetter

--- Check if the argument is a letter or a digit.
isAlphaNum = C.isLetterOrDigit

--- Check if the argument is in the range \'0\'..\'7\'
isOctDigit c = c >= '0' && c <= '7'

--- Check if the argument is a digit, in the range \'a\'..\'f\' or  in the range \'A\'..\'F\'
isHexDigit c = isDigit c || c >= 'a' && c <= 'f' || c >= 'A' && c <= 'F'

--- Check if the 'generalCategory' of the argument is one of 'NON_SPACING_MARK', 'ENCLOSING_MARK' or 'COMBINING_SPACING_MARK'
isMark c = case generalCategory c of
    NON_SPACING_MARK        →  true
    ENCLOSING_MARK          →  true
    COMBINING_SPACING_MARK  →  true
    _                       →  false

--- Check if the 'generalCategory' of the argument is one of 'DECIMAL_DIGIT_NUMBER', 'LETTER_NUMBER' or 'OTHER_NUMBER'
isNumber c = case generalCategory c of
    DECIMAL_DIGIT_NUMBER    →  true
    LETTER_NUMBER           →  true
    OTHER_NUMBER            →  true
    _                       →  false

{--
    Check if the 'generalCategory' of the argument is one of 'DASH_PUNCTUATION',
    'START_PUNCTUATION', 'END_PUNCTUATION', 'CONNECTOR_PUNCTUATION' or 'OTHER_PUNCTUATION'.
-}
isPunctuation c = case generalCategory c of
    DASH_PUNCTUATION        →  true
    START_PUNCTUATION       →  true
    END_PUNCTUATION         →  true
    CONNECTOR_PUNCTUATION   →  true
    OTHER_PUNCTUATION       →  true
    _                       →  false

{--
    Check if the 'generalCategory' of the argument is one of 'MATH_SYMBOL', 
    'CURRENCY_SYMBOL', 'MODIFIER_SYMBOL' or 'OTHER_SYMBOL'.
-}
isSymbol c = case generalCategory c of
    MATH_SYMBOL         →  true
    CURRENCY_SYMBOL     →  true
    MODIFIER_SYMBOL     →  true
    OTHER_SYMBOL        →  true
    _                   →  false

{--
    Check if the 'generalCategory' of the argument is one of 'SPACE_SEPARATOR',
    'LINE_SEPARATOR' or 'PARAGRAPH_SEPARATOR'.
-}
isSeparator c = case generalCategory c of
    SPACE_SEPARATOR     →  true
    LINE_SEPARATOR      →  true
    PARAGRAPH_SEPARATOR →  true
    _                   →  false

--- Check if a character is printable.
--- True for a defined character that is neither a surrogate character nor a control character.  
isPrint ∷ Char → Bool
isPrint c = c.isDefined && not c.isSurrogate && not (isControl c)

--- Check if the argument is from the ASCII character set.
isAscii c = ord c < 128

--- Check if the argument is from the Latin1 character set.
isLatin1 c = ord c < 256

--- Check if the argument is an ASCII upper case letter.
isAsciiUpper c = c >= 'A' && c <= 'Z'

--- Check if the argument is an ASCII lower case letter.
isAsciiLower c = c >= 'a' && c <= 'z'

--- Convert a hexadecimal digit to its 'Int' value.
--- For different number bases, see 'Char.digit'
digitToInt c = Char.digit c 16

--- Convert an 'Int' in the range 0..15 to the corresponding hexadecimal digit.
--- For other number bases than 16, see 'Char.forDigit'
intToDigit d = Char.forDigit d 16

isLetter :: Char -> Bool
isLetter c = case generalCategory c of
    UPPERCASE_LETTER        -> True
    LOWERCASE_LETTER        -> True
    TITLECASE_LETTER        -> True
    MODIFIER_LETTER         -> True
    OTHER_LETTER            -> True
    _                       -> False

isDec :: Char -> Bool
isDec c = c >= '0' && c <= '9'

protectEsc :: (Char -> Bool) -> ShowS -> ShowS
protectEsc p f = f . cont . unpacked
    where
        cont (s@(c:_)) | p c       = "\\&" ++ packed s
        cont s                     = packed s

asciiTab :: [String]
asciiTab = -- Using an array drags in the array module.  listArray ('\NUL', ' ')
           ["NUL", "SOH", "STX", "ETX", "EOT", "ENQ", "ACK", "BEL",
            "BS",  "HT",  "LF",  "VT",  "FF",  "CR",  "SO",  "SI",
            "DLE", "DC1", "DC2", "DC3", "DC4", "NAK", "SYN", "ETB",
            "CAN", "EM",  "SUB", "ESC", "FS",  "GS",  "RS",  "US",
            "SP"]

showLitChar :: Char -> ShowS
showLitChar c s | c > '\u007F' =  showChar '\\' (protectEsc isDec (shows (ord c)) s)
showLitChar '\u007F'         s =  showString "\\DEL" s
showLitChar '\\'             s =  showString "\\\\" s
showLitChar c s | c >= ' '     =  showChar c s
showLitChar '\u0007'         s =  showString "\\a" s
showLitChar '\b'             s =  showString "\\b" s
showLitChar '\f'             s =  showString "\\f" s
showLitChar '\n'             s =  showString "\\n" s
showLitChar '\r'             s =  showString "\\r" s
showLitChar '\t'             s =  showString "\\t" s
showLitChar '\u000B'         s =  showString "\\v" s
showLitChar '\u000E'         s =  protectEsc (== 'H') (showString "\\SO") s
showLitChar c                s =  showString (packed $ '\\' : (unpacked $ asciiTab!!(ord c))) s